# requests
# 先后端分离 前后端不分离
# json.loads()  json.dumps()
# import requests
#
# response = requests.get(url, headers=headers, params=params, proxies=proxies, allow_redirects=False)
# response = requests.post(url, headers=headers, data=data, json=json, proxies=proxies, allow_redirects=False)
# response.content.decode()
# response.status_code
# response.json()

# 反爬
# 请求头相关 ：User-Agent  Cookie  Secret-Key  Token(令牌 权限相关)
# 代理IP相关：私密代理 隧道代理
# aaaverification_code：代理IP绕过
# 请求频率过快：time.sleep()

# lxml与xpath    bs4与css selector
from lxml import etree

root = etree.HTML("<html></html>")
parent_node_List = root.xpath("//div")
for child_node in parent_node_List:
  title = child_node.xpath(".//a/text()")
  url =  child_node.xpath(".//a/@href")


from bs4 import BeautifulSoup
root = BeautifulSoup("<html></html>", "lxml")
parent_node_List = root.select("div")
for child_node in parent_node_List:
  title = child_node.select_one("a").text
  url =  child_node.select_one("a")["href"]


